package com.fslq.pipeline;

import com.alibaba.fastjson.JSON;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fslq.pojo.SchoolInfo;
import com.fslq.service.SchoolService;
import org.apache.catalina.mapper.Mapper;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.json.JsonParseException;
import org.springframework.boot.json.JsonParser;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;

import static com.fslq.task.AppProcessor.MAPPER;
import static java.lang.Thread.sleep;

/*schoolinfo数据入库*/
@Component
public class SchoolInfoDataPipeline implements Pipeline {
    @Autowired
    private SchoolService schoolService;
    @Override
    public void process(ResultItems resultItems,Task task) {
        WebDriver driver=new ChromeDriver( new ChromeOptions().addArguments( "--headless" ) );
        List<SchoolInfo> lists=new ArrayList<>(  );
        try{
            List<Integer> allSchoolId=schoolService.findAllSchoolId();
            AtomicInteger i=new AtomicInteger();
            allSchoolId.stream().forEach( p->{
                try{
                    System.out.println("学校编号："+i.getAndIncrement() );
                    lists.add( findInfo( driver,p) );
                 }catch(InterruptedException e){
                    System.out.println( "异常："+e.getMessage() );
                }catch(JsonProcessingException e){
                    e.printStackTrace();
                }
           } );
        }catch(Exception e){
            System.out.println( e.getMessage() );
        } finally {
        //关闭浏览器（这个包括驱动完全退出，会清除内存），close 是只关闭浏览器
             driver.quit();
             if(lists!=null)
                schoolService.saveAllInfo( lists );
        }
    }

    public SchoolInfo findInfo(WebDriver driver,Integer s_id) throws InterruptedException, JsonProcessingException {
        /**https://static-data.eol.cn/www/2.0/school/3243/info.json
         *     "school_id": "102",
         *     "f985": "1",
         *     "f211": "1",
         *     "num_subject": "24",重点学科
         *     "": "52",硕士点
         *     "num_doctor": "33",博士点
         *     "num_library": "460万",
         *     "num_lab": "16",重点实验室
         *     "create_date": "1921",
         *     "area": "9000.00",
         *     "dual_class_name": "双一流",
         *     school_site	"http://www.sysu.edu.cn/2012/cn/index.htm"
         *     phone	"020-84036491,020-84111598"
         *     content	"中山大学由孙中山先生创办，有着一百多年办学传统。作为中国教育部直属高校，通过部省共建，中山大学已经成为一所国内一流、国际知名的现代综合性大学。现由广州校区、珠海校区、深圳校区三个校区、五个校园及十家附属医院组成。中山大学正在向世界一流大学迈进，努力成为全球学术重镇。中山大学具有人文社科和理医工多学科厚实基础，不断追求学术创新，以国"
         *     img_url	"https://static-data.eol.cn//upload/svideo/piliang_104_thumb.jpg"
         */
        WebDriverWait wait=new WebDriverWait( driver,50 );
        //GET PAGE CONTENT
        String url="https://static-data.eol.cn/www/2.0/school/"+s_id+"/info.json";
        driver.get( url );
        //sleep( 500 );
        String text=wait.until( ExpectedConditions.presenceOfElementLocated( By.xpath( "/html/body" ) ) ).getText();
        //解析Json
        JsonNode s=MAPPER.readTree( text ).get( "data" );
        HashMap<String,Object> map=null;
        HashMap<String,Object> map_t=null;
        //封装到实体类
        map=MAPPER.convertValue(s,HashMap.class  );
        Object video=map.get( "video" );
        Serializable s1=String.valueOf( video );
        if(!s1.equals( "[]" )){
            map_t=MAPPER.convertValue( video,HashMap.class );
        }
        List<Float> vote=findVote( driver,s_id );
        SchoolInfo schoolInfo=buildSchoolInfo( map,map_t,vote,s_id );
        return schoolInfo;
    }
    private List<Float> findVote(WebDriver driver,Integer s_id) throws InterruptedException, JsonProcessingException {
        /**https://static-gkcx.eol.cn/www/school/102/vote/vote.json
         *   "0": "4.7",学习指数
         *   "1": "4.7",综合评分
         *   "2": "4.6",生活指数
         *   "5": "4.8" 就业指数
         */
        WebDriverWait wait=new WebDriverWait( driver,50 );
        //GET PAGE CONTENT
        String url="https://static-gkcx.eol.cn/www/school/"+s_id+"/vote/vote.json";
        driver.get( url );
        //sleep( 500 );
        String text=wait.until( ExpectedConditions.presenceOfElementLocated( By.xpath( "/html/body" ) ) ).getText();
        //解析Json
        List<Float> scores=new ArrayList<>( );
        boolean jsonForm=isJsonForm( text );
        if(isJsonForm( text )){
            HashMap<Integer,Object> map=null;
            JsonNode jsonNode=MAPPER.readTree( text );
            map=MAPPER.convertValue(jsonNode,HashMap.class  );
            //Float string=Float.parseFloat(  map.get( "0" ).toString());
            scores.add(Float.parseFloat( map.get( "0" ).toString()));
            scores.add(Float.parseFloat( map.get( "1" ).toString()));
            scores.add(Float.parseFloat( map.get( "2" ).toString()));
            scores.add(Float.parseFloat( map.get( "5" ).toString()));
        }
        return scores;
    }
    public static boolean isJsonForm(String json) {
        boolean result = false;
        try {
            Object obj=JSON.parse(json);
            result = true;
        } catch (Exception e) {
            result=false;
        }
        return result;
    }
    //封装SchoolInfo
    private SchoolInfo buildSchoolInfo(HashMap<String,Object> map,HashMap<String,Object> map_t,List<Float> vote,Integer s_id) {
        SchoolInfo schoolInfo=new SchoolInfo();
        schoolInfo.setSchoolId( s_id );
        if(map.size()>0){
            schoolInfo.setF985( map.get( "f985" ).toString() );
            schoolInfo.setF211( map.get( "f211" ).toString() );
            schoolInfo.setNumSubject(Long.parseLong(  map.get( "num_subject" ) .toString()));
            schoolInfo.setNumMaster( Long.parseLong( map.get( "num_master" ).toString()) );
            schoolInfo.setNumDoctor(Long.parseLong( map.get( "num_doctor" ) .toString()));
            schoolInfo.setDualClassName( map.get( "dual_class_name" ).toString() );
            schoolInfo.setNumLab( Long.parseLong( map.get( "num_lab" ).toString() ) );
            String[] num_libraries=map.get( "num_library" ).toString().split( "[^0-9]" );
            schoolInfo.setNumLibrary( Long.parseLong(  num_libraries[0]));
            schoolInfo.setCreateDate( Long.parseLong( map.get( "create_date" ).toString()) );
            schoolInfo.setArea( Float.parseFloat( map.get( "area" ).toString()) );
            schoolInfo.setSchoolSite( map.get( "school_site" ).toString() );
            schoolInfo.setPhone( map.get( "phone" ).toString() );
            schoolInfo.setContent( map.get( "content" ) .toString());
        }
        if(vote.size()>0){
            schoolInfo.setLearningIndex(vote.get( 0 ) );
            schoolInfo.setOverallScore( vote.get( 1 ) );
            schoolInfo.setLiveIndex( vote.get( 2 ) );
            schoolInfo.setJobIndex( vote.get( 3 ) );
        }
        if(map_t!=null)
            schoolInfo.setImgUrl( map_t.get( "img_url" ).toString() );
        return schoolInfo;
    }
}
